# File:     JOP_RR1_financial_topics.R
# Purpose:  This script applies Nicolo's financial topics to the data
# Input:    /Data/finalData.RData
# Output:   
# Author:   JB


rm(list = ls())
require(tidyverse)
require(ggridges)

setwd('C:/Users/Jimbo/Dropbox/FED/FED/Paper/JOP/RR1_replication/')

load('./data/finalData.RData')

dict <- list(TOPIC_price_stability = 'price(s)*|inflate|inflation|inflationary|HICP|CPI|PCE|PCE index|PCE inflation|deflation|deflator|deflationary|deflate|hyperinflation|hyperinflationary',
     TOPIC_financial_stability = 'financial (in)*stability|bank (in)*stability|(financial )*crisis|financial stress|financial risk|systemic risk|contagion|financial shocks|bubble|financial imbalance|misalignment|credit growth|banks|insurers|hedge funds|investment funds|financial markets|securities markets|leverage|capital|derivatives|off-balance sheet exposures|special purpose vehicles|off-balance sheet vehicles|payment systems|settlement systems|central securities depositories|non-performing loans|npls|non-performing exposures|foreign currency loans|correlated exposures',
     TOPIC_employment = 'employ(ee|er)*|(un)*employment|underemployment|firing|fixed-term|full-time|part-time|inactivity|job(s)*|jobless|labo(u)*r|labo(u)*r force|labo(u)*r market|self-employed|temporary|vacanc|work(er)*|workers|working|working( age| time)*|works',
     TOPIC_international_developments = 'Trade|International|Global|Cross-border|Emerging markets|Emerging economies|Outside the euro area|Outside the EU|Geopolitic|China|Chinese|Lehman|United States|The US|USA|America|Canada|Canadian|Japan|Japanese|Russia|Russian|India|Indian|Turkey|Turkish|Argentina|Argentinian|Brexit|United Kingdom|England|Norway|Norwegian|Enlargement|Developing economies|Developing countries|World Bank|IMF|War|Middle East|Far East|OPEC|WTO|Exchange rate|Sweden|Swedish|Oil|Gas|Commodity|G7|G20|Korea|Korean|Northern Rock|Terrorism|Terrorist|Africa|African|Asia|Australia|Oversea|External representation|IRE|Dollar|Pound|Ruble|Yuan|Yen|Renminbi|LTCM|External demand|Exports|Imports|Advanced economies|Value chain|US Treasuries|Fed|Federal Reserve|Bank of England|Scotland|Scottish|PBOC|Basel|Bank of International Settlements|BIS|Washington|New York',
     TOPIC_payments_issues = 'Payment|Payment systems|CCP|Clearing|Market infrastructures|Digital euro|Wholesale transactions|Bitcoin|Stablecoins|Libra|Diem|Instant payments|CBDC|Cash|Banknotes|Coins|Card|E-money|Private money|Central bank money|Digital dollar|TARGET|TARGET2|T2S|real-time gross settlement|SEPA|TIPS|Payment|Settlement|DLT|Ledger|Blockchain|Token|Digital currency|Cryptocurrencies|Crypto-currencies|Crypto-assets|Cryptoassets|Big tech firms|Big techs',
     TOPIC_EMU_governance = 'Fiscal policy|Fiscal rules|Fiscal Board|Bailout|Bail-in|Single Supervisory Mechanism SSM|Single Resolution Mechanism|SRM|Banking supervision|Microprudential|Macroprudential|Prudential policies|Macroeconomic policies|Five Presidents|Four Presidents|Economic and Monetary Union|EMU|EU budget|Multiannual financial framework|MFF|SURE|Stability and Growth Pact|SGP|Stability and growth|Banking Union|Deposit insurance|EDIS|NGEU|Next Generation|Recovery and Resilience|Fiscal capacity|BICC|Risk-sharing|Transfer union|Policy mix|International role of the euro|IRE|Moral hazard|Financial assistance|Troika|European Stability Mechanism|ESM|Corrective arm|Budget|Capital Markets Union|CMU|Integration|Deepening|Country Specific Recommendations|CSRs|Euro adoption|Changeover|Convergence|Divergence',
     TOPIC_environment = 'green|climate|climate change|green bond|sustainable finan|wildfire|hurricane|natural disaster|emission|co2|carbon dioxide|fossil fuel|pollut|greenwashing|carbon|brown|harm|fossil|fuel|environmental|environment|transition|ecology|ecological|taxonomy|greening|pollution|polluting|biodiversity|emissions|weather')

dict <- lapply(dict,function(x) tolower(x))

for(t in names(dict)) {
  utterance_level[[t]] <- grepl(dict[[t]],utterance_level$textclean)
}

pdf('./output/figures/financialTopics.pdf',width = 8,height = 5)
utterance_level %>%
  group_by(chamber,date,yellenTime) %>%
  summarise_at(vars(matches('TOPIC_[a-z]')),mean) %>%
  ungroup() %>%
  gather(topic,prop,-chamber,-date,-yellenTime) %>%
  mutate(topic = gsub('Emu','EMU',str_to_title(gsub('_',' ',gsub('TOPIC_','',topic))))) %>%
  mutate(yellenTime = ifelse(date < as.Date('2014-01-01'),'Pre',
                             ifelse(yellenTime,'Yellen','Post'))) %>%
  filter(!grepl('EMU',topic)) %>%
  ggplot(aes(x = date,y = prop,group = yellenTime)) + 
  geom_point() + 
  annotate(geom = 'rect',xmin = as.Date('2014-01-01'),xmax = as.Date('2018-01-01'),
           ymin = -Inf,ymax = Inf,
           alpha = .2,fill = 'grey50') +
  geom_vline(xintercept = as.Date(c('2014-01-01','2018-01-01'))) + 
  annotate(geom = 'text',x = as.Date('2016-01-01'),y = Inf,label = 'Yellen',
           vjust = 1) + 
  geom_smooth(method = 'lm',se = F) + 
  labs(x = 'Date',y = 'Proportion of utterances',
       title = 'Financial Topic Analysis',
       subtitle = 'Keyword-based topic identification') + 
  facet_wrap(~topic,scales = 'free')
dev.off()

utterance_level %>%
  mutate(spkr = ifelse(grepl('YELLEN',opensecretsID),'Yellen',
                       ifelse(grepl('FED',opensecretsID),'FED','Others'))) %>%
  select(date,ind,spkr,interrupted,matches('TOPIC_[a-z]')) %>%
  gather(topic,prop,-date,-ind,-spkr,-interrupted) %>%
  group_by(topic,prop,spkr) %>%
  summarise(interrupted = mean(interrupted),
            n = n()) %>%
  filter(prop) %>%
  ggplot(aes(x = interrupted,y = reorder(topic,interrupted),color = spkr,size = n)) + 
  geom_point()


toAnal <- utterance_level %>%
  mutate(spkr = factor(ifelse(grepl('YELLEN',opensecretsID),'Yellen',
                       ifelse(grepl('FED',opensecretsID),'FED','Others')),
                       levels = c('Others','FED','Yellen'))) %>%
  select(date,ind,spkr,interrupted,matches('TOPIC_[a-z]')) %>%
  gather(topic,prop,-date,-ind,-spkr,-interrupted)

require(fixest)
summary(m <- feols(interrupted ~ topic | date,
                   toAnal %>% filter(prop,
                                     !grepl('EMU',topic)) %>%
                     mutate(topic = relevel(factor(gsub('TOPIC','',topic)),ref = '_employment'),
                            spkr = relevel(factor(paste0('_',spkr)),ref = '_Others'))))

summary(m2 <- feols(interrupted ~ topic + spkr | date,
                    toAnal %>% filter(prop,
                                      !grepl('EMU',topic)) %>%
                      mutate(topic = relevel(factor(gsub('TOPIC','',topic)),ref = '_employment'),
                             spkr = relevel(factor(paste0('_',spkr)),ref = '_Others'))))

summary(m3 <- feols(interrupted ~ topic*spkr | date,
                    toAnal %>% filter(prop,
                                      !grepl('EMU',topic)) %>%
                      mutate(topic = relevel(factor(gsub('TOPIC','',topic)),ref = '_employment'),
                             spkr = relevel(factor(paste0('_',spkr)),ref = '_Others'))))

etable(m,m2,m3,signif.code = c('***' = .001,'**' = .01,'*' = .05,'\\dag' = .1),file = './output/tables/financialTopics_interaction.tex')


summary(m4 <- feols(prop ~ topic*spkr | date,
                    toAnal %>% filter(!grepl('EMU',topic)) %>%
                      mutate(topic = relevel(factor(gsub('TOPIC','',topic)),ref = '_employment'),
                             spkr = relevel(factor(paste0('_',spkr)),ref = '_FED'))))


toAnal %>%
  filter(prop) %>%
  count(topic) %>%
  arrange(n)
require(marginaleffects)

plot_cap(m4,condition = c('topic','spkr'))
plot_cme(m4,variables = 'topic',condition= 'spkr')
